import re
from PyPDF2 import PdfReader

## 修复常见JSON字符串中的错误
def fix_json_string(json_string):
    fixed_content = json_string
    fixed_content = re.sub(r"(?<!\\)'", '"', fixed_content)  # 将单引号替换为双引号
    fixed_content = re.sub(r',\s*}', '}', fixed_content)  # 移除多余的逗号
    fixed_content = re.sub(r',\s*]', ']', fixed_content)  # 移除多余的逗号
    return fixed_content

def deduplicate_dict_list(dict_list):
    # 将字典列表转换为元组列表
    tuple_list = [tuple(d.items()) for d in dict_list]
        
    # 去重
    tuple_list = list(set(tuple_list))
        
    # 将元组列表转换回字典列表
    dict_list = [dict(t) for t in tuple_list]
    
    return dict_list

from functools import lru_cache

@lru_cache(maxsize=32)
def get_pdf_reader(file_path: str):
    return PdfReader(file_path)
